{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "'''\n",
    "The core data type in Numpy is the ndarray, which enables fast and space-efficient multidimensional array processing.\n",
    "Note: This notebook is adapted from chapter 4 Python for Data Analysis by Wes McKinney and O'Reilly publishing. NumPy has many, \n",
    "many features that won't be covered here. The following snippets are just to illustrate basic data types and operations within\n",
    "numpy.\n",
    "\n",
    "Another good resource for learning more about ndarrays is here:\n",
    "http://docs.scipy.org/doc/numpy/reference/arrays.html\n",
    "'''\n",
    "\n",
    "#First, import NumPy\n",
    "import numpy as np\n",
    "\n",
    "#It is easy to create Nx1 and NxM arrays from standard Python lists\n",
    "l1 = [0,1,2]\n",
    "l2 = [3,4,5]\n",
    "\n",
    "nd1 = np.array(l1)\n",
    "nd2 = np.array([l1,  l2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The ndarray has dimension n=3 and m=1\n",
      "The ndarray has elements of type=int64\n",
      "The ndarray has dimension n=2 and m=3\n",
      "The ndarray has elements of type=int64\n"
     ]
    }
   ],
   "source": [
    "#Now, we can get ask for some basic info to describe the ndarray\n",
    "def desc_ndarray(nd):\n",
    "    try:\n",
    "        print \"The ndarray has dimension n=%s and m=%s\" % (nd.shape[0],nd.shape[1])\n",
    "    except:\n",
    "        print \"The ndarray has dimension n=%s and m=1\" % nd.shape[0]\n",
    "    print \"The ndarray has elements of type=%s\" % nd.dtype\n",
    "\n",
    "desc_ndarray(nd1)\n",
    "\n",
    "desc_ndarray(nd2)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[array([ 0.,  0.,  0.,  0.]),\n",
       " array([ 1.,  1.,  1.,  1.]),\n",
       " array([ 0.47121338,  1.83328779,  0.4438019 , -0.52309325])]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#There are short cuts for creating certain frequently used special ndarrays, i.e.,\n",
    "\n",
    "k=4\n",
    "\n",
    "#1. an ndarray of all zeros\n",
    "zero = np.zeros(k)\n",
    "\n",
    "#2. an ndarray of all ones\n",
    "one = np.ones(k)\n",
    "\n",
    "#3. an ndarray of random elements (this one is standard normal, but there are many distributions to choose from)\n",
    "rand = np.random.randn(k)\n",
    "\n",
    "[zero, one, rand]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[array([[ 0.69394907,  0.85723722],\n",
       "        [-0.16779156,  0.41709003],\n",
       "        [-0.94008249, -0.21591983],\n",
       "        [-0.61305106,  0.41435495]]),\n",
       " array([-0.16779156,  0.41709003]),\n",
       " 0.41709003439166575]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "'''\n",
    "For indexing an array:\n",
    "1. If nx1 array, follow the same protocol as a regular Python list\n",
    "2. If nxm array use the following examples\n",
    "'''\n",
    "\n",
    "arr2d = np.random.randn(4,2)\n",
    "\n",
    "#A single index gets a full row\n",
    "\n",
    "#2 indexes returns a value\n",
    "[arr2d, arr2d[1],  arr2d[1,1]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[array([-0.4386254 , -0.67720483, -1.19775067, -0.21300288]),\n",
       " array([-0.8772508 , -1.35440967, -2.39550135, -0.42600575]),\n",
       " array([-0.8772508 , -1.35440967, -2.39550135, -0.42600575]),\n",
       " array([-0., -0., -0., -0.])]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "'''\n",
    "Operations between Arrays and Scalars\n",
    "An important feature of ndarrays is they allow batch operations on data without writing any for loops.  \n",
    "This is called vectorization.\n",
    "Any arithmetic operations between equal-size arrays applies the operation elementwise. \n",
    "'''\n",
    "\n",
    "#examples\n",
    "\n",
    "k = 4\n",
    "rand = np.random.randn(k)\n",
    "[rand, rand + rand, 2*rand, rand*np.zeros(4)]\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[array([ 0.19631415,  0.41059714,  4.26249299]),\n",
       " array([-1.46310809,  1.15559786,  0.10690073]),\n",
       " array([-1.26679394,  1.566195  ,  4.36939372])]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "'''\n",
    "Matrix operations\n",
    "It is easy to do matrix operations with Nd arrays. The standard arithmetic operators don't work here though. And it is important \n",
    "to make sure matrix shapes are compatible\n",
    "'''\n",
    "\n",
    "k = 3\n",
    "r1 = np.random.randn(k)\n",
    "r2 = np.random.randn(k)\n",
    "\n",
    "#Matrix addition is the standard matrix operator\n",
    "[r1, r2 , r1 + r2]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[array([[ 0.19631415,  0.41059714,  4.26249299],\n",
       "        [-1.46310809,  1.15559786,  0.10690073]]),\n",
       " array([[ 0.19631415, -1.46310809],\n",
       "        [ 0.41059714,  1.15559786],\n",
       "        [ 4.26249299,  0.10690073]])]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#The Transpose can be taken with the attribute T\n",
    "arr2d = np.array([r1, r2])\n",
    "[arr2d, arr2d.T]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[array([[ 0.19631415,  0.41059714,  4.26249299],\n",
       "        [-1.46310809,  1.15559786,  0.10690073]]),\n",
       " array([[  3.85392468e-02,   1.68590015e-01,   1.81688465e+01],\n",
       "        [  2.14068529e+00,   1.33540642e+00,   1.14277663e-02]]),\n",
       " array([[ 18.37597578,   0.64291997],\n",
       "        [  0.64291997,   3.48751947]])]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "'''\n",
    "Matrix multiplication, like inner products can be done on arrays.\n",
    "Just remember that the standard multiplication operator does elementwise multiplication (provided they are the same shape).\n",
    "We need the dot method in order to do an inner product\n",
    "\n",
    "Numpy has a linalg library that can run most matrix operations on ndarrays:\n",
    "http://docs.scipy.org/doc/numpy/reference/routines.linalg.html\n",
    "\n",
    "One can also create a matrix object and use the methods in numpy.matrix to achieve the same thing:\n",
    "http://docs.scipy.org/doc/numpy/reference/generated/numpy.matrix.html\n",
    "'''\n",
    "\n",
    "[arr2d, arr2d * arr2d, arr2d.dot(arr2d.T)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "10000 loops, best of 3: 119 µs per loop\n"
     ]
    }
   ],
   "source": [
    "'''\n",
    "One important feature of vectorization is that it allows elementwise processing that is much faster than writing a traditional\n",
    "loop.\n",
    "'''\n",
    "import math\n",
    "\n",
    "#show an example and profile i\n",
    "%timeit [math.sqrt(x) for x in range(1000)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The slowest run took 9.83 times longer than the fastest. This could mean that an intermediate result is being cached \n",
      "100000 loops, best of 3: 5.19 µs per loop\n"
     ]
    }
   ],
   "source": [
    "%timeit np.sqrt(np.arange(1000))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "ERROR: Line magic function `%inline` not found.\n"
     ]
    }
   ],
   "source": [
    "'''\n",
    "The last thing we'll cover in this module is the numpy.random library. In general, it is advised to use numpy for\n",
    "random number generation as opposed to python's built in random module.\n",
    "\n",
    "Random number generation has many uses. One common use is generating fake (i.e. random) data to test modeling procedures\n",
    "or to do Monte Carlo Simulations\n",
    "'''\n",
    "import matplotlib.pyplot as plt\n",
    "%inline\n",
    "\n",
    "\n",
    "#Generate random pairs that have a multivariate normal distribution\n",
    "N = 1000\n",
    "mu = np.array([0,0])\n",
    "cov = 0.5\n",
    "sig = np.array([[1, cov],[cov, 1]]) #Must be square, symmetric and non-negative definite\n",
    "x, y = np.random.multivariate_normal(mu, sig, N).T\n",
    "#Now let's plot and see what that looks like\n",
    "\n",
    "\n",
    "plt.plot(x, y,'x'); plt.axis('equal'); plt.show()\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "'''\n",
    "One final example (taken from Wes Mckinney's book):\n",
    "\n",
    "Let's generate a random walk and visualize it\n",
    "'''\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "nsteps = 1000\n",
    "draws = np.random.randint(0, 2, size = nsteps) #Randint let's us generate random integers in a range\n",
    "steps = np.where(draws>0, 1, -1) #there function let's us do boolean logic on a conditional applied to an entire array\n",
    "walk = steps.cumsum() #Cumsum returns an array with the same size as steps, that has cum sum of steps up to index i\n",
    "plt.plot(np.arange(len(walk)), walk);plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
